package com.data.source.api

import org.apache.spark.sql.SparkSession

object ParquetApp {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().appName("parquetApp").master("local[2]").getOrCreate()

    // 如果不指定format，则默认是parquet数据源
    /**
    spark支持的数据源：
    build-in: json, parquest, jdbc, csv
    package支持: 不是spark内置，具体可参见: https://spark-packages.org/
     */
    val df = spark.read.format("parquet").load("file:///Users/username/workspace_code/learn/spark-learn/datasets/users.parquet")
    df.printSchema()
    df.show()

    val selectDf = df.select(df.col("name"), df.col("favorite_color"))
    selectDf.write.format("json").save("file:///Users/username/workspace_code/learn/spark-learn/datasets/tmp/users.json")

    spark.stop()
  }
}
